Assignment: RNN and ConvNets (60 points)¶
The data file ”data.csv” contains 3 time series x1, x2, and y along with the corresponding date column. The data ranges from beginning of 2019 to the end of Feb. of 2020. The objective of this problem is to make predictions for y for March 1st and 2nd in 2020.
Kai Hsin Hung | Harshitha Mallappa
1. Explore regular feedforward neural network models for this problem. (10points)¶
In [1]:
import pandas as pd
import numpy as np
In [2]:
data = pd.read_csv('timeseriesData.csv')
data.head()
Out[2]:
| Date | x1 | x2 | y | |
|---|---|---|---|---|
| 0 | 1/1/19 | 51.0 | 5.550000 | 65.58 |
| 1 | 1/2/19 | 51.0 | 8.950000 | 65.35 |
| 2 | 1/3/19 | 43.0 | 7.033333 | 69.80 |
| 3 | 1/4/19 | 43.0 | 7.033333 | 69.76 |
| 4 | 1/5/19 | 53.0 | 4.950000 | 70.48 |
In [3]:
data.info()
missing_val = data.isna().sum()
print(f'Missing val:\n{missing_val}')
<class 'pandas.core.frame.DataFrame'> RangeIndex: 427 entries, 0 to 426 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 425 non-null object 1 x1 387 non-null float64 2 x2 387 non-null float64 3 y 387 non-null float64 dtypes: float64(3), object(1) memory usage: 13.5+ KB Missing val: Date 2 x1 40 x2 40 y 40 dtype: int64
In [4]:
# data imputation for Na values
for col in ['x1', 'x2', 'y', 'Date']:
data[col] = data[col].ffill()
missing_val = data.isna().sum()
print(f'Missing val:\n{missing_val}')
# Change Date dtype ot Date and sort it
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')
feature = ['x1', 'x2']
target = 'y'
Missing val: Date 0 x1 0 x2 0 y 0 dtype: int64
/var/folders/f8/_j25ckdn20vbvtb_vyn8bz0r0000gn/T/ipykernel_14288/2962500903.py:8: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format. data['Date'] = pd.to_datetime(data['Date'])
In [5]:
data
Out[5]:
| Date | x1 | x2 | y | |
|---|---|---|---|---|
| 0 | 2019-01-01 | 51.0 | 5.550000 | 65.58 |
| 1 | 2019-01-02 | 51.0 | 8.950000 | 65.35 |
| 2 | 2019-01-03 | 43.0 | 7.033333 | 69.80 |
| 3 | 2019-01-04 | 43.0 | 7.033333 | 69.76 |
| 4 | 2019-01-05 | 53.0 | 4.950000 | 70.48 |
| ... | ... | ... | ... | ... |
| 422 | 2020-02-27 | 19.0 | 5.483333 | 83.62 |
| 423 | 2020-02-28 | 19.0 | 5.483333 | 83.62 |
| 424 | 2020-02-29 | 65.0 | 6.183333 | 68.53 |
| 425 | 2020-02-29 | 65.0 | 6.183333 | 68.53 |
| 426 | 2020-02-29 | 65.0 | 6.183333 | 68.53 |
427 rows × 4 columns
In [6]:
# split the data set, portion: 70% of training 15% of validation 15% of tes
X = data[feature].values
y = data[target].values
train_portion = round(X.shape[0] * 0.7)
val_portion = round(X.shape[0] * 0.15)
train_data_y = y[:train_portion].reshape(-1, 1)
val_data_y = y[train_portion:train_portion+val_portion].reshape(-1, 1)
test_data_y = y[train_portion+val_portion:].reshape(-1, 1)
# ensure the portion of dataset
print(f"training: {len(train_data_y)}, val: {len(val_data_y)}, test: {len(test_data_y)}")
training: 299, val: 64, test: 64
In [7]:
# preprocessing
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
sc.fit(train_data_y)
train_norm = sc.transform(train_data_y)
val_norm = sc.transform(val_data_y)
test_norm = sc.transform(test_data_y)
In [8]:
# convert sequence function
def to_sequence(data, look_back, foresight):
X, Y = [], []
for i in range(len(data) - (look_back + foresight)):
look_back_seq = data[i:(i+look_back), 0]
foresight_seq = (data[i + (look_back+foresight), 0])
X.append(look_back_seq)
Y.append(foresight_seq)
return np.array(X), np.array(Y)
In [9]:
# setting look back and foresight like the lecture
train_seqX, train_seqY = to_sequence(train_norm, look_back=7, foresight=6)
val_seqX, val_seqY = to_sequence(val_norm, look_back=7, foresight=6)
test_seqX, test_seqY = to_sequence(test_norm, look_back=7, foresight=6)
In [10]:
from keras.models import Sequential
from keras.layers import Dense, Input
feedforward_model = Sequential()
# set input_shape = 7 to align look_back number
# Dense model only have 2D input
feedforward_model.add(Input(shape = (7, )))
feedforward_model.add(Dense(64, activation='relu'))
feedforward_model.add(Dense(32, activation='relu'))
feedforward_model.add(Dense(1, activation='linear'))
feedforward_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
feedforward_model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 64) │ 512 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 32) │ 2,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 1) │ 33 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 2,625 (10.25 KB)
Trainable params: 2,625 (10.25 KB)
Non-trainable params: 0 (0.00 B)
In [11]:
from keras.callbacks import EarlyStopping
checkpoint = EarlyStopping(monitor='val_loss', patience=5, mode='auto', restore_best_weights = True)
callback_list = [checkpoint]
feedforward_network = feedforward_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY),
epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - loss: 0.0340 - mean_absolute_error: 0.0340 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122 Epoch 2/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0154 - mean_absolute_error: 0.0154 - val_loss: 0.0145 - val_mean_absolute_error: 0.0145 Epoch 3/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0219 - mean_absolute_error: 0.0219 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117 Epoch 4/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0181 - mean_absolute_error: 0.0181 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123 Epoch 5/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0137 - mean_absolute_error: 0.0137 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 6/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0179 - mean_absolute_error: 0.0179 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117 Epoch 7/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0151 - mean_absolute_error: 0.0151 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 8/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.0176 - mean_absolute_error: 0.0176 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 9/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0117 - mean_absolute_error: 0.0117 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 10/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0170 - mean_absolute_error: 0.0170 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113 Epoch 11/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 12/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0132 - mean_absolute_error: 0.0132 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113 Epoch 13/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0115 - mean_absolute_error: 0.0115 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 14/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0171 - mean_absolute_error: 0.0171 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 15/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0133 - mean_absolute_error: 0.0133 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 16/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0126 - mean_absolute_error: 0.0126 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113 Epoch 17/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0167 - mean_absolute_error: 0.0167 - val_loss: 0.0113 - val_mean_absolute_error: 0.0113 Epoch 18/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 19/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0162 - mean_absolute_error: 0.0162 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 20/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0140 - mean_absolute_error: 0.0140 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114 Epoch 21/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0116 - mean_absolute_error: 0.0116 - val_loss: 0.0114 - val_mean_absolute_error: 0.0114
In [12]:
from sklearn.metrics import mean_absolute_error
test_norm_predict = feedforward_model.predict(test_seqX)
# convert back to unnormalize
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
test_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
In [13]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(y = feedforward_network.history['loss'], mode = 'lines', name='Training error'))
fig.add_trace(go.Scatter(y = feedforward_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean Absolute error', title_text=f'Unnormalized MAE = {test_Mae:.3f}')
fig.show()
(c) What are the predicted values of y for March 1st and March 2nd?¶
In [14]:
# get last 7 data due to look back setting to 7
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = feedforward_model.predict(last_7_values)
pred_march1_feedforward = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
In [15]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = feedforward_model.predict(updated_last_7_values)
pred_march2_feedforward = sc.inverse_transform(pred_march2_norm)
print(f"Feed forward model prediction for March 1st {pred_march1_feedforward[0][0]:.3f}, prediction for March 2nd {pred_march2_feedforward[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step Feed forward model prediction for March 1st 71.550, prediction for March 2nd 72.606
2. Explore recurrent neural network models for this problem. (10 points)¶
LSTM¶
In [16]:
from keras.layers import LSTM
LSTM_model = Sequential()
LSTM_model.add(Input(shape=(7, 1)))
LSTM_model.add(LSTM(32, dropout = 0.1, recurrent_dropout = 0.1))
LSTM_model.add(Dense(1, activation='linear'))
LSTM_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
LSTM_model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 32) │ 4,352 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_3 (Dense) │ (None, 1) │ 33 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 4,385 (17.13 KB)
Trainable params: 4,385 (17.13 KB)
Non-trainable params: 0 (0.00 B)
In [17]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
LSTM_network = LSTM_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY),
epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 32ms/step - loss: 0.0228 - mean_absolute_error: 0.0228 - val_loss: 0.0131 - val_mean_absolute_error: 0.0131 Epoch 2/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.0196 - mean_absolute_error: 0.0196 - val_loss: 0.0138 - val_mean_absolute_error: 0.0138 Epoch 3/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0182 - mean_absolute_error: 0.0182 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121 Epoch 4/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0136 - mean_absolute_error: 0.0136 - val_loss: 0.0135 - val_mean_absolute_error: 0.0135 Epoch 5/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119 Epoch 6/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0159 - mean_absolute_error: 0.0159 - val_loss: 0.0124 - val_mean_absolute_error: 0.0124 Epoch 7/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0129 - mean_absolute_error: 0.0129 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118 Epoch 8/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0150 - mean_absolute_error: 0.0150 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122 Epoch 9/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118 Epoch 10/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0127 - mean_absolute_error: 0.0127 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121 Epoch 11/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0148 - mean_absolute_error: 0.0148 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119 Epoch 12/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0131 - mean_absolute_error: 0.0131 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120 Epoch 12: early stopping Restoring model weights from the end of the best epoch: 7.
In [18]:
test_norm_predict = LSTM_model.predict(test_seqX)
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
testLSTM_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 76ms/step
In [19]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=LSTM_network.history['loss'], mode='lines', name='Training error'))
fig.add_trace(go.Scatter(y=LSTM_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE = {testLSTM_Mae:.3f}')
fig.show()
(c) What are the predicted values of y for March 1st and March 2nd?¶
In [20]:
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = LSTM_model.predict(last_7_values)
pred_march1_LSTM = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
In [21]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = LSTM_model.predict(updated_last_7_values)
pred_march2_LSTM = sc.inverse_transform(pred_march2_norm)
print(f"LSTM model prediction for March 1st {pred_march1_LSTM[0][0]:.3f}, prediction for March 2nd {pred_march2_LSTM[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step LSTM model prediction for March 1st 72.899, prediction for March 2nd 72.956
GRU¶
In [22]:
from keras.layers import GRU
GRU_model = Sequential()
GRU_model.add(Input(shape=(7, 1)))
GRU_model.add(GRU(32, dropout = 0.1, recurrent_dropout = 0.1))
GRU_model.add(Dense(1, activation='linear'))
GRU_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
In [23]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
GRU_model_network = GRU_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY),
epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 32ms/step - loss: 0.0200 - mean_absolute_error: 0.0200 - val_loss: 0.0148 - val_mean_absolute_error: 0.0148 Epoch 2/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0153 - mean_absolute_error: 0.0153 - val_loss: 0.0125 - val_mean_absolute_error: 0.0125 Epoch 3/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0149 - mean_absolute_error: 0.0149 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117 Epoch 4/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0118 - val_mean_absolute_error: 0.0118 Epoch 5/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0128 - mean_absolute_error: 0.0128 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 6/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0144 - mean_absolute_error: 0.0144 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 7/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0155 - mean_absolute_error: 0.0155 - val_loss: 0.0117 - val_mean_absolute_error: 0.0117 Epoch 8/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 9/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0154 - mean_absolute_error: 0.0154 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 10/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0157 - mean_absolute_error: 0.0157 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 11/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0149 - mean_absolute_error: 0.0149 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 12/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0138 - mean_absolute_error: 0.0138 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 13/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0134 - mean_absolute_error: 0.0134 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 14/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0146 - mean_absolute_error: 0.0146 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 15/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0139 - mean_absolute_error: 0.0139 - val_loss: 0.0115 - val_mean_absolute_error: 0.0115 Epoch 16/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.0124 - mean_absolute_error: 0.0124 - val_loss: 0.0116 - val_mean_absolute_error: 0.0116 Epoch 16: early stopping Restoring model weights from the end of the best epoch: 11.
In [24]:
test_norm_predict = GRU_model.predict(test_seqX)
print(test_norm_predict.shape)
test_predict = sc.inverse_transform(test_norm_predict)
testY = sc.inverse_transform(test_seqY.reshape(-1, 1))
test_GRUMae = mean_absolute_error(testY, test_predict)
WARNING:tensorflow:5 out of the last 9 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x17a560160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/2 ━━━━━━━━━━━━━━━━━━━━ 0s 79ms/stepWARNING:tensorflow:6 out of the last 10 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x17a560160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 78ms/step (51, 1)
In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=GRU_model_network.history['loss'], mode='lines', name='Traning error'))
fig.add_trace(go.Scatter(y=GRU_model_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE: {test_GRUMae:.3f}')
(c) What are the predicted values of y for March 1st and March 2nd?¶
In [26]:
data_norm = sc.transform(y.reshape(-1, 1))
last_7_values = data_norm[-7:]
pred_march1_norm = GRU_model.predict(last_7_values)
pred_march1_GRU = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 116ms/step
In [27]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = GRU_model.predict(updated_last_7_values)
pred_march2_GRU = sc.inverse_transform(pred_march2_norm)
print(f"Prediction for March 1st {pred_march1_GRU[0][0]:.3f}, prediction for March 2nd {pred_march2_GRU[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step Prediction for March 1st 66.749, prediction for March 2nd 72.250
In [28]:
print('Compare LSTM and GRU')
print(f'LSTM:\nMAE: {testLSTM_Mae:.3f}\nMarch 1st: {pred_march1_LSTM[0][0]:.3f} March 2nd: {pred_march2_LSTM[0][0]:.3f}')
print(f'GRU:\nMAE: {test_GRUMae:.3f}\nMarch 1st: {pred_march1_GRU[0][0]:.3f} March 2nd: {pred_march2_GRU[0][0]:.3f}')
Compare LSTM and GRU LSTM: MAE: 6.723 March 1st: 72.899 March 2nd: 72.956 GRU: MAE: 6.925 March 1st: 66.749 March 2nd: 72.250
3. Explore 1d convolutional neural network models for this problem. (10 points)¶
In [29]:
from keras.layers import Conv1D,MaxPool1D, Flatten
conv_model = Sequential()
# kernel size = the step of model going throuhg
# pool size = taking max of the number
conv_model.add(Input(shape=(7, 1)))
conv_model.add(Conv1D(filters=32, kernel_size=1, activation='relu'))
conv_model.add(MaxPool1D(pool_size=2))
conv_model.add(Flatten())
conv_model.add(Dense(64, activation='relu'))
conv_model.add(Dense(1, activation='linear'))
conv_model.compile(loss='mae', optimizer='adam', metrics=['mean_absolute_error'])
In [30]:
checkpoint = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
callback_list = [checkpoint]
conv_model_network = conv_model.fit(train_seqX, train_seqY, validation_data=(val_seqX, val_seqY),
epochs = 100, batch_size = 64, callbacks = callback_list)
Epoch 1/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - loss: 0.0206 - mean_absolute_error: 0.0206 - val_loss: 0.0140 - val_mean_absolute_error: 0.0140 Epoch 2/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0156 - mean_absolute_error: 0.0156 - val_loss: 0.0136 - val_mean_absolute_error: 0.0136 Epoch 3/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0143 - mean_absolute_error: 0.0143 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123 Epoch 4/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0135 - mean_absolute_error: 0.0135 - val_loss: 0.0126 - val_mean_absolute_error: 0.0126 Epoch 5/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0140 - mean_absolute_error: 0.0140 - val_loss: 0.0122 - val_mean_absolute_error: 0.0122 Epoch 6/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0180 - mean_absolute_error: 0.0180 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120 Epoch 7/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0119 - mean_absolute_error: 0.0119 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121 Epoch 8/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0129 - mean_absolute_error: 0.0129 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119 Epoch 9/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0130 - mean_absolute_error: 0.0130 - val_loss: 0.0119 - val_mean_absolute_error: 0.0119 Epoch 10/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0170 - mean_absolute_error: 0.0170 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121 Epoch 11/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0151 - mean_absolute_error: 0.0151 - val_loss: 0.0121 - val_mean_absolute_error: 0.0121 Epoch 12/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0120 - mean_absolute_error: 0.0120 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123 Epoch 13/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0147 - mean_absolute_error: 0.0147 - val_loss: 0.0123 - val_mean_absolute_error: 0.0123 Epoch 14/100 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.0172 - mean_absolute_error: 0.0172 - val_loss: 0.0120 - val_mean_absolute_error: 0.0120 Epoch 14: early stopping Restoring model weights from the end of the best epoch: 9.
In [31]:
test_norm_predict = conv_model.predict(test_seqX)
test_predict = sc.inverse_transform(test_norm_predict)
testconv_Mae = mean_absolute_error(testY, test_predict)
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step
In [32]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=conv_model_network.history['loss'], mode='lines', name='Training error'))
fig.add_trace(go.Scatter(y=conv_model_network.history['val_loss'], mode='lines', name='Validation error'))
fig.update_layout(xaxis_title = 'Epochs', yaxis_title = 'Mean absolute error', title_text = f'Unnormalized MAE: {testconv_Mae:.3f}')
(c) What are the predicted values of y for March 1st and March 2nd?¶
In [33]:
data_norm = sc.transform(y.reshape(-1, 1))
# for align with conv model
last_7_values = data_norm[-7:].reshape(1, 7)
pred_march1_norm = conv_model.predict(last_7_values)
pred_march1_conv = sc.inverse_transform(pred_march1_norm)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
In [34]:
last_pred_combine = np.concatenate([data_norm.flatten(), pred_march1_norm.flatten()])
updated_last_7_values = last_pred_combine[-7:].reshape(1, 7)
pred_march2_norm = GRU_model.predict(updated_last_7_values)
pred_march2_conv = sc.inverse_transform(pred_march2_norm)
print(f"Prediction for March 1st {pred_march1_GRU[0][0]:.3f}, prediction for March 2nd {pred_march2_conv[0][0]:.3f}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step Prediction for March 1st 66.749, prediction for March 2nd 72.813
Conclusion¶
In [35]:
print('Feed forward:')
print(f'MAE: {test_Mae:.3f}\nMarch 1st: {pred_march1_feedforward[0][0]:.3f} March 2nd: {pred_march2_feedforward[0][0]:.3f}\n')
print('Recurrent network:')
print(f'LSTM:\nMAE: {testLSTM_Mae:.3f}\nMarch 1st: {pred_march1_LSTM[0][0]:.3f} March 2nd: {pred_march2_LSTM[0][0]:.3f}\n')
print(f'GRU:\nMAE: {test_GRUMae:.3f}\nMarch 1st: {pred_march1_GRU[0][0]:.3f} March 2nd: {pred_march2_GRU[0][0]:.3f}\n')
print('Convnet 1D:')
print(f'MAE: {testconv_Mae:.3f}\nMarch 1st: {pred_march1_conv[0][0]:.3f} March 2nd: {pred_march2_conv[0][0]:.3f}')
Feed forward: MAE: 7.568 March 1st: 71.550 March 2nd: 72.606 Recurrent network: LSTM: MAE: 6.723 March 1st: 72.899 March 2nd: 72.956 GRU: MAE: 6.925 March 1st: 66.749 March 2nd: 72.250 Convnet 1D: MAE: 6.744 March 1st: 71.662 March 2nd: 72.813
Based on those model's result, we can see the LSTM has minimum test error compare the other models